library(data.table)
library(estimatr)
library(interflex)
library(dplyr)
library(ggplot2)
library(texreg)
library(xtable)
library(here)

data <- fread(here('data', 'Fed Legitimacy Survey_February 7, 2022_07.47.csv'),
              na.strings = "")
data <- data[3:nrow(data), ]

# Consent and attention checks --------------------------------------------

# Remove respondents who do not consent or did not complete the survey
data <- subset(data, consent_question == "I consent, begin the study.")
data <- subset(data, Finished == "True")

data$attentive <- ifelse(data$atten_thank == "I understand." &
                           data$atten_interest == "Extremely interested,Very interested",
                         1, 0)

data$id <- 1:nrow(data)

# Drop unused -------------------------------------------------------------
data <- data[ , -c('StartDate',
                   'EndDate',
                   'Status',
                   'IPAddress',
                   'Progress',
                   'Duration (in seconds)',
                   'Finished',
                   'RecordedDate',
                   'ResponseId',
                   'RecipientLastName',
                   'RecipientFirstName',
                   'RecipientEmail',
                   'ExternalReference',
                   'LocationLatitude',
                   'LocationLongitude',
                   'DistributionChannel',
                   'UserLanguage',
                   'region',
                   'zip') ]


# Clean Demographic Data --------------------------------------------------
# Gender
# female = 1, male = 0
data$female <- as.integer(data$gender) - 1 

# Partisanship
data$democrat <- ifelse(data$political_party %in% c(1:3, 6), 1, 0)
data$republican <- ifelse(data$political_party %in% c(5, 8:10), 1, 0)
data$independent <- ifelse(data$political_party %in% c(4, 7), 1, 0)

# Education
# Finished associates/bachelor == 1, else 0
data$education[data$education == -3105] <- NA
data$education <- as.integer(data$education)
data$education[is.na(data$education)] <- round(mean(data$education, na.rm = TRUE))

data$college <- ifelse(as.integer(data$education) >= 5, 1, 0)

# Stock
# Own stock = 1
data$stock <- ifelse(data$dem_stock == 'Yes', 1,0 )

# Age
data$age <- as.integer(data$age)

data <- mutate(data, ageBin = case_when(age >= 18 & age <= 24 ~ "18", 
                                        age >= 25 & age <= 39 ~ "25",
                                        age >= 40 & age <= 59 ~ "40",
                                        age >= 60 ~"60"))

data <- fastDummies::dummy_cols(data, 'ageBin', ignore_na = TRUE)

# HHI
# Impute missing data with mean.
table(data$hhi)
data$hhi[data$hhi == -3105] <- NA
data$hhi <- as.integer(data$hhi)
data$hhi[is.na(data$hhi)] <- round(mean(data$hhi, na.rm = TRUE))

data <- mutate(data, hhiBin = case_when(hhi >= 1 & hhi <= 8 ~ "49", 
                                        hhi >= 9 & hhi <= 18 ~ "99",
                                        hhi >= 19 & hhi <= 20 ~ "149",
                                        hhi >= 21 ~ "150"))

data <- fastDummies::dummy_cols(data, 'hhiBin', ignore_na = TRUE)

# Fed Lending Variables ---------------------------------------------------

# TREATMENT VARIABLES
# Collapse outcomes from different treatments into single variable

treatments <- c('fed_treat_lend', 'fed_treat_lend_china',
                'fed_treat_risk', 'fed_treat_risk_china',
                'fed_treat_mh', 'fed_treat_mh_china',
                'fed_treat_dem', 'fed_treat_dem_china')

# Create variable indicating which treatment was received
data$fed_treatment <- NA
data$outcome_policy <- NA

for (r in 1:nrow(data)) {
  data$outcome_policy[r] <- as.character(na.omit(unlist(data[r, ..treatments])))
  data$fed_treatment[r] <- treatments[which(!is.na(data[r, ..treatments]))]
  
}
rm(r)

data$fed_treatment <- factor(data$fed_treatment, levels = treatments)

# Create policy indicator
data$fed_treatment_policy <- gsub('_china', '', data$fed_treatment, fixed = T)
data$fed_treatment_policy <- factor(data$fed_treatment_policy, 
                                    levels = c('fed_treat_lend', 'fed_treat_mh',
                                               'fed_treat_dem', 'fed_treat_risk'))
table(data$fed_treatment_policy, data$fed_treatment)

# Create China indicator
data$fed_treatment_china <- ifelse(grepl('china', 
                                         data$fed_treatment, 
                                         fixed = TRUE),
                                   1, 0)

# OUTCOME VARIABLES
# Convert trust to integer 
data$outcome_trust <- as.integer(data$fed_trust_1)

# Policy support indicator
data$outcome_policy_support <- ifelse(data$outcome_policy %in% 
                                        c('Strongly support', 
                                          'Support', 'Slightly support'), 
                                      1, 0)

# Policy full scale
data$outcome_policy_int <- recode(data$outcome_policy,
                                  'Strongly support' = 6L, 
                                  'Support' = 5L, 
                                  'Slightly support' = 4L,
                                  'Slightly oppose' = 3L,
                                  'Oppose' = 2L, 
                                  'Strongly oppose' = 1L)

# Create nationalism index ------------------------------------------------

# Nat1
# How many things about America make you ashamed?
# Very many, many, not many, none

data$nat1_int[data$nat1 == 'None'] <- 3
data$nat1_int[data$nat1 == 'Not many'] <- 2
data$nat1_int[data$nat1 == 'Many'] <- 1
data$nat1_int[data$nat1 == 'Very many'] <- 0

# Nat2 
# How superior is the United States compared to other nations?
# Vastly superior, very superior, not so superior, not at all superior

data$nat2_int[data$nat2 == 'Vastly superior'] <- 3
data$nat2_int[data$nat2 == 'Very superior'] <- 2
data$nat2_int[data$nat2 == 'Not so superior'] <- 1
data$nat2_int[data$nat2 == 'Not at all superior'] <- 0

# Nat 3
# I would rather be a citizen of America than of any other country in the world.
# Strongly agree, somewhat agree, somewhat disagree, strongly disagree

data$nat3_int[data$nat3 == 'Strongly agree'] <- 3
data$nat3_int[data$nat3 == 'Somewhat agree'] <- 2
data$nat3_int[data$nat3 == 'Somewhat disagree'] <- 1
data$nat3_int[data$nat3 == 'Strongly disagree'] <- 0

data$nat_total_int <- (data$nat1_int + data$nat2_int + data$nat3_int) / 9

survey1 <- data
rm(data, treatments)